Time series of monthly maximum rainfall [mm] in a day
library(xts)
library(zoo)
library(tseries)
library(copula)
setwd("/home/davide/universitĂ /tesi magistrale/dati/arpa")
load("only_rain.RData")
print(coordinates)
## station alt lat long
## 1 Brugnera 22 45.91792 12.54500
## 2 Capriva.del.Friuli 85 45.95809 13.51233
## 3 Cervignano.del.Friuli 8 45.84949 13.33701
## 4 Cividale.del.Friuli 127 46.08044 13.42001
## 5 Codroipo 37 45.95236 13.00274
## 6 Enemonzo 438 46.41042 12.86254
## 7 Fagagna 148 46.10169 13.07389
## 8 Fossalon 0 45.71477 13.45886
## 9 Gemona.del.Friuli 184 46.26130 13.12209
## 10 Gradisca.d.Is. 29 45.88979 13.48181
## 11 Musi 600 46.31266 13.27468
## 12 Palazzolo.dello.Stella 5 45.80572 13.05260
## 13 San.Vito.al.Tgl. 21 45.89566 12.81499
## 14 Sgonico.Zgonik 268 45.73800 13.74206
## 15 Talmassons 16 45.88231 13.15779
## 16 Tarvisio 794 46.51078 13.55189
## 17 Udine.S.O. 91 46.03521 13.22667
## 18 Vivaro 142 46.07653 12.76881
knitr::include_graphics("mappa_stazioni_rain.png")
#in separate plots
trace(plot.zoo,
quote(mtext <- function(...) graphics::mtext(..., cex = 0.5, las = 1)))
## [1] "plot.zoo"
plot.zoo(clean_rain_xts, oma = c(6, 5, 5, 0))
## Tracing plot.zoo(clean_rain_xts, oma = c(6, 5, 5, 0)) on entry
untrace(plot.zoo)
Acf plots and ljung box test
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (x in colnames(clean_rain_xts)){
acf(ts(clean_rain_xts[,x]), main=x) #plot of the ACF
}
#round function not working
for (x in colnames(clean_rain_xts)){
print(paste0("p-value of Ljung-Box test on rain in ", x , " is:", Box.test(clean_rain_xts[,x], lag = 20, type = "Ljung-Box")$p.value))
}
## [1] "p-value of Ljung-Box test on rain in Brugnera is:0.131982372909309"
## [1] "p-value of Ljung-Box test on rain in Capriva.del.Friuli is:0.342420393409757"
## [1] "p-value of Ljung-Box test on rain in Cervignano.del.Friuli is:0.764437631764927"
## [1] "p-value of Ljung-Box test on rain in Cividale.del.Friuli is:0.298603616354461"
## [1] "p-value of Ljung-Box test on rain in Codroipo is:0.194210840006664"
## [1] "p-value of Ljung-Box test on rain in Enemonzo is:0.0252374442208957"
## [1] "p-value of Ljung-Box test on rain in Fagagna is:0.406266394402289"
## [1] "p-value of Ljung-Box test on rain in Fossalon is:0.325963162941074"
## [1] "p-value of Ljung-Box test on rain in Gemona.del.Friuli is:0.023805779326967"
## [1] "p-value of Ljung-Box test on rain in Gradisca.d.Is. is:0.280495475660849"
## [1] "p-value of Ljung-Box test on rain in Musi is:0.00475603287750948"
## [1] "p-value of Ljung-Box test on rain in Palazzolo.dello.Stella is:0.934749836668725"
## [1] "p-value of Ljung-Box test on rain in San.Vito.al.Tgl. is:0.868452009714319"
## [1] "p-value of Ljung-Box test on rain in Sgonico.Zgonik is:0.840696071596907"
## [1] "p-value of Ljung-Box test on rain in Talmassons is:0.909658489009452"
## [1] "p-value of Ljung-Box test on rain in Tarvisio is:2.14358588460639e-05"
## [1] "p-value of Ljung-Box test on rain in Udine.S.O. is:0.253189905807489"
## [1] "p-value of Ljung-Box test on rain in Vivaro is:0.335901375480045"
for (x in colnames(clean_rain_xts)){
print(paste0("p-value of Ljung-Box test on squared rain in ", x , " is:", Box.test(clean_rain_xts[, x]^2, lag = 20, type = "Ljung-Box")$p.value))
}
## [1] "p-value of Ljung-Box test on squared rain in Brugnera is:0.163294547971732"
## [1] "p-value of Ljung-Box test on squared rain in Capriva.del.Friuli is:0.841589296245896"
## [1] "p-value of Ljung-Box test on squared rain in Cervignano.del.Friuli is:0.445652499691442"
## [1] "p-value of Ljung-Box test on squared rain in Cividale.del.Friuli is:0.926739822293949"
## [1] "p-value of Ljung-Box test on squared rain in Codroipo is:0.256656863284502"
## [1] "p-value of Ljung-Box test on squared rain in Enemonzo is:0.0704492125027978"
## [1] "p-value of Ljung-Box test on squared rain in Fagagna is:0.564285551650741"
## [1] "p-value of Ljung-Box test on squared rain in Fossalon is:0.194653546913393"
## [1] "p-value of Ljung-Box test on squared rain in Gemona.del.Friuli is:0.0215392068379517"
## [1] "p-value of Ljung-Box test on squared rain in Gradisca.d.Is. is:0.29739985117221"
## [1] "p-value of Ljung-Box test on squared rain in Musi is:0.0124915872643815"
## [1] "p-value of Ljung-Box test on squared rain in Palazzolo.dello.Stella is:0.799616200060337"
## [1] "p-value of Ljung-Box test on squared rain in San.Vito.al.Tgl. is:0.986513470431944"
## [1] "p-value of Ljung-Box test on squared rain in Sgonico.Zgonik is:0.67278247569016"
## [1] "p-value of Ljung-Box test on squared rain in Talmassons is:0.793492215144275"
## [1] "p-value of Ljung-Box test on squared rain in Tarvisio is:1.09083383346142e-05"
## [1] "p-value of Ljung-Box test on squared rain in Udine.S.O. is:0.0843148611886035"
## [1] "p-value of Ljung-Box test on squared rain in Vivaro is:0.333677758332719"
Slight seasonality effect on some stations (Tarvisio, Gemona, Enemonzo,… ), but it can be safely ignored. The data can be used directly without passing through residuals, transformation of the data to pseudo-observations, and pairwise scatterplots.
#create the matrices of pseudo-observations (formed from original rain and deseasoned T_maxseries)
clean_rain<-as.matrix(clean_rain_xts)
colnames(clean_rain)<-colnames(clean_rain_xts)
clean_rain<-pobs(clean_rain)
pairs2(clean_rain, cex=0.6)
library(heatmaply)
heatmaply_cor(corKendall(clean_rain), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none")
Exchangeability test done on the lower triangular matrix (all elements of pairs2 below the matrix diagonal) high p-value for almost all (all exchangeable)
which(as.numeric(exchangeability_test_matrix[3,]) < 0.05)
## [1] 35 56 68 112 117 118 126 127 129
The low p-value for exchangeability is for the cases:
exchangeability_test_matrix[,which(as.numeric(exchangeability_test_matrix[3,]) < 0.05)]
## [,1] [,2] [,3]
## [1,] "Cervignano.del.Friuli" "Cividale.del.Friuli" "Codroipo"
## [2,] "Codroipo" "Palazzolo.dello.Stella" "Musi"
## [3,] "0.00949050949050949" "0.0134865134865135" "0.0014985014985015"
## [,4] [,5] [,6]
## [1,] "Gemona.del.Friuli" "Gemona.del.Friuli" "Gradisca.d.Is."
## [2,] "San.Vito.al.Tgl." "Vivaro" "Musi"
## [3,] "0.0024975024975025" "0.0154845154845155" "0.0164835164835165"
## [,7] [,8] [,9]
## [1,] "Musi" "Musi" "Musi"
## [2,] "Palazzolo.dello.Stella" "San.Vito.al.Tgl." "Talmassons"
## [3,] "0.0474525474525475" "0.0044955044955045" "0.0104895104895105"
Looking at data it seems that it is a problem of not enough data rather than non-exchangeability
More or less half of the pairs are radially symmetric
which(as.numeric(radial_test_matrix[3,]) < 0.05)
## [1] 3 4 5 6 7 8 10 11 14 16 19 21 24 25 26 27 30 31 38
## [20] 41 50 53 55 57 60 63 64 66 68 73 76 81 82 84 86 89 91 92
## [39] 93 95 96 104 105 111 112 114 116 117 122 126 127 129 131 138 141 142 144
## [58] 148 150 151 153
#radial_test_matrix[,which(as.numeric(radial_test_matrix[3,]) < 0.05)]
low p-value for all (no extreme value dependence)
which(as.numeric(extreme_value_test_matrix[3,]) > 0.05)
## integer(0)
diagonal independence, upper curve perfect positive dependence. On all there is strong lower tail dependence (“drought” periods are extendend to the whole area), not on all there is a strong upper tail dependence (concurrent extreme rainfall is rarer and occurrs in close stations)
library(VineCopula)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
heatmaply_cor(lower_tdc, xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none")
Both heatmaps confirm what seen from Kendall distribution function
heatmaply_cor(upper_tdc, xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none")
The lower tdc is high and similar for all couples so it is not useful for clustering. The clustering will be done using the dissimilarity matrix of the upper tdc (-log(upper_tdc)) Different methods for the clustering are tested
dissimilarity_diff_upper<- (1-upper_tdc)^0.5
upper_clustering_diff_average<-hclust(d=as.dist(dissimilarity_diff_upper), method="average")
upper_clustering_diff_complete <- hclust(d=as.dist(dissimilarity_diff_upper), method = "complete")
plot(upper_clustering_diff_average, main="Average")
abline(h=0.91, col="blue")
plot(upper_clustering_diff_complete, main="Complete")
abline(h=0.91, col="blue")
With average there are 5 groups
knitr::include_graphics("mappa_stazioni_rain_cluster_complete_utdc.png")
cluster11<-clean_rain[,c("Enemonzo", "Tarvisio")]
cluster12<-clean_rain[,c("Brugnera", "Vivaro")]
cluster13<-clean_rain[,c("Fagagna", "Gemona.del.Friuli", "Musi")]
cluster14<-clean_rain[,c("Talmassons", "Cividale.del.Friuli", "Udine.S.O.", "Palazzolo.dello.Stella","San.Vito.al.Tgl.", "Codroipo")]
cluster15<-clean_rain[,c("Fossalon", "Sgonico.Zgonik", "Gradisca.d.Is.", "Capriva.del.Friuli", "Cervignano.del.Friuli")]
Scatterplots of clusters K-plot and Heatmap of tdc in clusters:
pairs2(cluster11, main="cluster11")
heatmaply_cor(fitLambda(cluster11, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster11")
pairwise<-combn(colnames(cluster11), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster11), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster11")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster12, main="cluster12")
heatmaply_cor(fitLambda(cluster12, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster12")
pairwise<-combn(colnames(cluster12), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster12), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster12")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster13, main="cluster13")
heatmaply_cor(fitLambda(cluster13, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster13")
pairwise<-combn(colnames(cluster13), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster13), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster13")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster14, main="cluster14")
heatmaply_cor(fitLambda(cluster14, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster14")
pairwise<-combn(colnames(cluster14), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster14), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster14")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster15, main="cluster15")
heatmaply_cor(fitLambda(cluster15, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster15")
pairwise<-combn(colnames(cluster15), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster15), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster15")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
tau_matrix<-corKendall(clean_rain)
dissimilarity_diff_ken<- (2*(1-tau_matrix))^0.5
ken_clustering_diff_average<-hclust(d=as.dist(dissimilarity_diff_ken), method="average")
ken_clustering_diff_complete <- hclust(d=as.dist(dissimilarity_diff_ken), method = "complete")
plot(ken_clustering_diff_average, main="Average")
plot(ken_clustering_diff_complete, main="Complete")
abline(h=1.1, col="blue")
knitr::include_graphics("mappa_stazioni_rain_cluster_complete_Kendall.png")
cluster1<-clean_rain[,c("Enemonzo", "Tarvisio", "Gemona.del.Friuli", "Musi")]
cluster2<-clean_rain[,c("Talmassons", "Cividale.del.Friuli", "Udine.S.O.", "Palazzolo.dello.Stella","San.Vito.al.Tgl.", "Codroipo", "Cervignano.del.Friuli", "Brugnera", "Vivaro", "Fagagna")]
cluster3<-clean_rain[,c("Fossalon", "Sgonico.Zgonik", "Gradisca.d.Is.", "Capriva.del.Friuli")]
pairs2(cluster1, main="cluster1")
heatmaply_cor(fitLambda(cluster1, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster1")
pairwise<-combn(colnames(cluster1), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster1), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster1")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster2, main="cluster2")
heatmaply_cor(fitLambda(cluster2, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster2")
pairwise<-combn(colnames(cluster2), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
heatmaply_cor(corKendall(cluster2), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster2")
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
pairs2(cluster13, main="cluster3")
heatmaply_cor(fitLambda(cluster3, lower.tail = FALSE), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster3")
pairwise<-combn(colnames(cluster3), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster3), xlab = "Stations",
ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster3")
par(mfrow=c(2,3)) #soluzione temporanea con piĂą plot
for (i in 1:(length(pairwise)/2)){
BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}
both exchangeable and radially symmetric
pairs2(cluster11, main="cluster11")
radSymTest(cluster11)$p.value
## Warning in radSymTest(cluster11): argument 'ties' set to TRUE
## [1] 0.3811189
exchTest(cluster11)$p.value
## Warning in exchTest(cluster11): argument 'ties' set to TRUE
## [1] 0.2052947
N<-1000
t.copula_cluster11 <- tCopula(dim = dim(cluster11)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster11, x=cluster11, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.015887, parameter = 0.75132, p-value = 0.518
both exchangeable and radially symmetric
pairs2(cluster12, main="cluster11")
radSymTest(cluster12)$p.value
## Warning in radSymTest(cluster12): argument 'ties' set to TRUE
## [1] 0.1303696
exchTest(cluster12)$p.value
## Warning in exchTest(cluster12): argument 'ties' set to TRUE
## [1] 0.6568432
t.copula_cluster12 <- tCopula(dim = dim(cluster12)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster12, x=cluster12, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.027536, parameter = 0.8245, p-value = 0.04545
both exchangeable and radially symmetric (but low p value on radial)
pairs2(cluster13, main="cluster13")
radSymTest(cluster13)$p.value
## Warning in radSymTest(cluster13): argument 'ties' set to TRUE
## [1] 0.01048951
exchTest(cluster13)$p.value
## Warning in exchTest(cluster13): argument 'ties' set to TRUE
## [1] 0.9675325
#both exchangeable and radially symmetric
t.copula_cluster13 <- tCopula(dim = dim(cluster13)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster13, x=cluster13, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 3, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.044218, parameter1 = 0.74166, parameter2 = 0.65142,
## parameter3 = 0.84525, p-value = 0.06643
exchangeable and radially symmetric (lowish p-value)
pairs2(cluster14, main="cluster14")
radSymTest(cluster14)$p.value
## Warning in radSymTest(cluster14): argument 'ties' set to TRUE
## [1] 0.06343656
exchTest(cluster14)$p.value
## Warning in exchTest(cluster14): argument 'ties' set to TRUE
## [1] 0.9295704
t.copula_cluster14 <- tCopula(dim = dim(cluster14)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster14, x=cluster14, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 6, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.042891, parameter1 = 0.75121, parameter2 = 0.82238,
## parameter3 = 0.85020, parameter4 = 0.77869, parameter5 = 0.81826,
## parameter6 = 0.85999, parameter7 = 0.65849, parameter8 = 0.67274,
## parameter9 = 0.76231, parameter10 = 0.71793, parameter11 = 0.75000,
## parameter12 = 0.84544, parameter13 = 0.75894, parameter14 = 0.76397,
## parameter15 = 0.84009, p-value = 0.5709
exchangeable (lowish p-value) but not radially symmetric. p-value on gof of t copula low but not under threshold…
pairs2(cluster15, main="cluster15")
radSymTest(cluster14)$p.value
## Warning in radSymTest(cluster14): argument 'ties' set to TRUE
## [1] 0.07342657
exchTest(cluster14)$p.value
## Warning in exchTest(cluster14): argument 'ties' set to TRUE
## [1] 0.9285714
t.copula_cluster15 <- tCopula(dim = dim(cluster15)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster15, x=cluster15, N=N, simulation="mult")
##
## Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 5, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.076633, parameter1 = 0.68920, parameter2 = 0.76888,
## parameter3 = 0.70845, parameter4 = 0.76948, parameter5 = 0.71116,
## parameter6 = 0.66176, parameter7 = 0.60501, parameter8 = 0.87559,
## parameter9 = 0.84655, parameter10 = 0.77825, p-value = 0.07343
The gof seems to not reject the t-copula hypothesis for any of the clusters except for cluster 12 in which the p-value oscillate around 0.045-0.051 based on the fit; maybe try cross validation instead of gof…